{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert(\"spark\" in globals())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert(sc.defaultParallelism > 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "from pyspark.sql.types import StringType, FloatType, StructField, StructType\n",
    "\n",
    "d = load_iris()\n",
    "\n",
    "def make_records(features, label, label_names):\n",
    "    temp = [float(f) for f in features]\n",
    "    temp.append(str(label_names[label]))\n",
    "    return temp\n",
    "\n",
    "col_types = [StructField(fname, FloatType(), False) for fname in d[\"feature_names\"]]\n",
    "col_types.append(StructField(\"target\", StringType(), False))\n",
    "schema = StructType(col_types)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = spark.createDataFrame([make_records(feature, label, d[\"target_names\"]) \\\n",
    "                            for feature, label in zip(d[\"data\"], d[\"target\"])], schema)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert(df.count() == 150)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "expected_columns = d[\"feature_names\"].copy()\n",
    "expected_columns.append(\"target\")\n",
    "assert(all(actual == expected for actual, expected in zip(df.columns, expected_columns)))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
